#https://datatables.net/reference/option/
options(DT.options = list(scrollX = TRUE, pagin=TRUE, fixedHeader = TRUE, searchHighlight = TRUE))a = read_csv('hotel_bookings.csv') %>%
clean_names() %>%
mutate(across(where(is.character), factor)) %>%
select(sort(tidyselect::peek_vars())) %>%
select(
where(is.Date),
where(is.factor),
where(is.numeric)
)| Name | a |
| Number of rows | 119390 |
| Number of columns | 32 |
| _______________________ | |
| Column type frequency: | |
| Date | 1 |
| factor | 13 |
| numeric | 18 |
| ________________________ | |
| Group variables | None |
Variable type: Date
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| reservation_status_date | 0 | 1 | 2014-10-17 | 2017-09-14 | 2016-08-07 | 926 |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| agent | 0 | 1 | FALSE | 334 | 9: 31961, NUL: 16340, 240: 13922, 1: 7191 |
| arrival_date_month | 0 | 1 | FALSE | 12 | Aug: 13877, Jul: 12661, May: 11791, Oct: 11160 |
| assigned_room_type | 0 | 1 | FALSE | 12 | A: 74053, D: 25322, E: 7806, F: 3751 |
| company | 0 | 1 | FALSE | 353 | NUL: 112593, 40: 927, 223: 784, 67: 267 |
| country | 0 | 1 | FALSE | 178 | PRT: 48590, GBR: 12129, FRA: 10415, ESP: 8568 |
| customer_type | 0 | 1 | FALSE | 4 | Tra: 89613, Tra: 25124, Con: 4076, Gro: 577 |
| deposit_type | 0 | 1 | FALSE | 3 | No : 104641, Non: 14587, Ref: 162 |
| distribution_channel | 0 | 1 | FALSE | 5 | TA/: 97870, Dir: 14645, Cor: 6677, GDS: 193 |
| hotel | 0 | 1 | FALSE | 2 | Cit: 79330, Res: 40060 |
| market_segment | 0 | 1 | FALSE | 8 | Onl: 56477, Off: 24219, Gro: 19811, Dir: 12606 |
| meal | 0 | 1 | FALSE | 5 | BB: 92310, HB: 14463, SC: 10650, Und: 1169 |
| reservation_status | 0 | 1 | FALSE | 3 | Che: 75166, Can: 43017, No-: 1207 |
| reserved_room_type | 0 | 1 | FALSE | 10 | A: 85994, D: 19201, E: 6535, F: 2897 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| adr | 0 | 1 | 101.83 | 50.54 | -6.38 | 69.29 | 94.58 | 126 | 5400 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| adults | 0 | 1 | 1.86 | 0.58 | 0.00 | 2.00 | 2.00 | 2 | 55 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| arrival_date_day_of_month | 0 | 1 | 15.80 | 8.78 | 1.00 | 8.00 | 16.00 | 23 | 31 | <U+2587><U+2587><U+2587><U+2587><U+2586> |
| arrival_date_week_number | 0 | 1 | 27.17 | 13.61 | 1.00 | 16.00 | 28.00 | 38 | 53 | <U+2585><U+2587><U+2587><U+2587><U+2585> |
| arrival_date_year | 0 | 1 | 2016.16 | 0.71 | 2015.00 | 2016.00 | 2016.00 | 2017 | 2017 | <U+2583><U+2581><U+2587><U+2581><U+2586> |
| babies | 0 | 1 | 0.01 | 0.10 | 0.00 | 0.00 | 0.00 | 0 | 10 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| booking_changes | 0 | 1 | 0.22 | 0.65 | 0.00 | 0.00 | 0.00 | 0 | 21 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| children | 4 | 1 | 0.10 | 0.40 | 0.00 | 0.00 | 0.00 | 0 | 10 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| days_in_waiting_list | 0 | 1 | 2.32 | 17.59 | 0.00 | 0.00 | 0.00 | 0 | 391 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| is_canceled | 0 | 1 | 0.37 | 0.48 | 0.00 | 0.00 | 0.00 | 1 | 1 | <U+2587><U+2581><U+2581><U+2581><U+2585> |
| is_repeated_guest | 0 | 1 | 0.03 | 0.18 | 0.00 | 0.00 | 0.00 | 0 | 1 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| lead_time | 0 | 1 | 104.01 | 106.86 | 0.00 | 18.00 | 69.00 | 160 | 737 | <U+2587><U+2582><U+2581><U+2581><U+2581> |
| previous_bookings_not_canceled | 0 | 1 | 0.14 | 1.50 | 0.00 | 0.00 | 0.00 | 0 | 72 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| previous_cancellations | 0 | 1 | 0.09 | 0.84 | 0.00 | 0.00 | 0.00 | 0 | 26 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| required_car_parking_spaces | 0 | 1 | 0.06 | 0.25 | 0.00 | 0.00 | 0.00 | 0 | 8 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| stays_in_week_nights | 0 | 1 | 2.50 | 1.91 | 0.00 | 1.00 | 2.00 | 3 | 50 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| stays_in_weekend_nights | 0 | 1 | 0.93 | 1.00 | 0.00 | 0.00 | 1.00 | 2 | 19 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
| total_of_special_requests | 0 | 1 | 0.57 | 0.79 | 0.00 | 0.00 | 0.00 | 1 | 5 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
# these numeric vars s/b factor vars
a = a %>% mutate_at(vars(arrival_date_day_of_month, arrival_date_week_number, arrival_date_year, is_canceled, is_repeated_guest), factor)
# reordering df
a = a %>% select(sort(tidyselect::peek_vars())) %>%
select(
where(is.Date),
where(is.factor),
where(is.numeric)
)## [1] "2014-10-17" "2017-09-14"
a %>% count(reservation_status_date, name = 'count') %>%
arrange(reservation_status_date) %>%
plot_ly(
x = ~reservation_status_date,
y = ~count
) %>% layout(
title = 'booking counts by date',
xaxis = list(title = ''),
yaxis = list(title = '')
)## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
a %>% group_by(reservation_status_date, hotel) %>%
summarise(count = n()) %>%
arrange(reservation_status_date) %>%
plot_ly(
x = ~reservation_status_date,
y = ~count,
color = ~hotel,
alpha = 0.7
) %>% layout(
title = 'booking counts by date/hotel',
xaxis = list(title = ''),
yaxis = list(title = '')
)## `summarise()` regrouping output by 'reservation_status_date' (override with `.groups` argument)
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
a %>% group_by(reservation_status_date, customer_type) %>%
summarise(count = n()) %>%
arrange(reservation_status_date) %>%
plot_ly(
x = ~reservation_status_date,
y = ~count,
color = ~customer_type,
alpha = 0.7
) %>% layout(
title = 'booking counts by date/customer_type',
xaxis = list(title = ''),
yaxis = list(title = '')
)## `summarise()` regrouping output by 'reservation_status_date' (override with `.groups` argument)
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
a %>% group_by(reservation_status_date, deposit_type) %>%
summarise(count = n()) %>%
arrange(reservation_status_date) %>%
plot_ly(
x = ~reservation_status_date,
y = ~count,
color = ~deposit_type,
alpha = 0.7
) %>% layout(
title = 'booking counts by date/deposit_type',
xaxis = list(title = ''),
yaxis = list(title = '')
)## `summarise()` regrouping output by 'reservation_status_date' (override with `.groups` argument)
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
a %>% group_by(reservation_status_date, distribution_channel) %>%
summarise(count = n()) %>%
arrange(reservation_status_date) %>%
plot_ly(
x = ~reservation_status_date,
y = ~count,
color = ~distribution_channel,
alpha = 0.7
) %>% layout(
title = 'booking counts by date/distribution_channel',
xaxis = list(title = ''),
yaxis = list(title = '')
)## `summarise()` regrouping output by 'reservation_status_date' (override with `.groups` argument)
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
## Rows: 119,390
## Columns: 18
## $ agent <fct> NULL, NULL, NULL, 304, 240, 240, NULL, 30...
## $ arrival_date_day_of_month <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ arrival_date_month <fct> July, July, July, July, July, July, July,...
## $ arrival_date_week_number <fct> 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 2...
## $ arrival_date_year <fct> 2015, 2015, 2015, 2015, 2015, 2015, 2015,...
## $ assigned_room_type <fct> C, C, C, A, A, A, C, C, A, D, E, D, E, G,...
## $ company <fct> NULL, NULL, NULL, NULL, NULL, NULL, NULL,...
## $ country <fct> PRT, PRT, GBR, GBR, GBR, GBR, PRT, PRT, P...
## $ customer_type <fct> Transient, Transient, Transient, Transien...
## $ deposit_type <fct> No Deposit, No Deposit, No Deposit, No De...
## $ distribution_channel <fct> Direct, Direct, Direct, Corporate, TA/TO,...
## $ hotel <fct> Resort Hotel, Resort Hotel, Resort Hotel,...
## $ is_canceled <fct> 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0,...
## $ is_repeated_guest <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ market_segment <fct> Direct, Direct, Direct, Corporate, Online...
## $ meal <fct> BB, BB, BB, BB, BB, BB, BB, FB, BB, HB, B...
## $ reservation_status <fct> Check-Out, Check-Out, Check-Out, Check-Ou...
## $ reserved_room_type <fct> C, C, A, A, A, A, C, C, A, D, E, D, D, G,...
jpal = colorRampPalette(brewer.pal(8,'Dark2'))(15)
a %>% select(where(is.factor)) %>%
map(n_unique) %>%
as.tibble() %>%
pivot_longer(everything()) %>%
plot_ly(y = ~name, x = ~value, color = ~name, colors = jpal) %>%
add_bars() %>%
hide_legend() %>%
layout(
title = 'distribution of level counts per factor',
xaxis = list(title = ''),
yaxis = list(title = '')
)## Warning: `as.tibble()` is deprecated as of tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## $agent
## [1] NULL 304 240 303 15 241 8 250 115 5 175 134 156 243 242
## [16] 3 105 40 147 306 184 96 2 127 95 146 9 177 6 143
## [31] 244 149 167 300 171 305 67 196 152 142 261 104 36 26 29
## [46] 258 110 71 181 88 251 275 69 248 208 256 314 126 281 273
## [61] 253 185 330 334 328 326 321 324 313 38 155 68 335 308 332
## [76] 94 348 310 339 375 66 327 387 298 91 245 385 257 393 168
## [91] 405 249 315 75 128 307 11 436 1 201 183 223 368 336 291
## [106] 464 411 481 10 154 468 410 390 440 495 492 493 434 57 531
## [121] 420 483 526 472 429 16 446 34 78 139 252 270 47 114 301
## [136] 193 182 135 350 195 352 355 159 363 384 360 331 367 64 406
## [151] 163 414 333 427 431 430 426 438 433 418 441 282 432 72 450
## [166] 180 454 455 59 451 254 358 469 165 467 510 337 476 502 527
## [181] 479 508 535 302 497 187 13 7 27 14 22 17 28 42 20
## [196] 19 45 37 61 39 21 24 41 50 30 54 52 12 44 31
## [211] 83 32 63 60 55 56 89 87 118 86 85 210 214 129 179
## [226] 138 174 170 153 93 151 119 35 173 58 53 133 79 235 192
## [241] 191 236 162 215 157 287 132 234 98 77 103 107 262 220 121
## [256] 205 378 23 296 290 229 33 286 276 425 484 323 403 219 394
## [271] 509 111 423 4 70 82 81 74 92 99 90 112 117 106 148
## [286] 158 144 211 213 216 232 150 267 227 247 278 280 285 289 269
## [301] 295 265 288 122 294 325 341 344 346 359 283 364 370 371 25
## [316] 141 391 397 416 404 299 197 73 354 444 408 461 388 453 459
## [331] 474 475 480 449
## 334 Levels: 1 10 103 104 105 106 107 11 110 111 112 114 115 117 118 119 ... NULL
##
## $arrival_date_day_of_month
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31
## 31 Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 ... 31
##
## $arrival_date_month
## [1] July August September October November December January
## [8] February March April May June
## 12 Levels: April August December February January July June March ... September
##
## $arrival_date_week_number
## [1] 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
## [26] 52 53 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
## [51] 24 25 26
## 53 Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 ... 53
##
## $arrival_date_year
## [1] 2015 2016 2017
## Levels: 2015 2016 2017
##
## $assigned_room_type
## [1] C A D E G F I B H P L K
## Levels: A B C D E F G H I K L P
##
## $company
## [1] NULL 110 113 270 178 240 154 144 307 268 59 204 312 318 94
## [16] 174 274 195 223 317 281 118 53 286 12 47 324 342 373 371
## [31] 383 86 82 218 88 31 397 392 405 331 367 20 83 416 51
## [46] 395 102 34 84 360 394 457 382 461 478 386 112 486 421 9
## [61] 308 135 224 504 269 356 498 390 513 203 263 477 521 169 515
## [76] 445 337 251 428 292 388 130 250 355 254 543 531 528 62 120
## [91] 42 81 116 530 103 39 16 92 61 501 165 291 290 43 325
## [106] 192 108 200 465 287 297 490 482 207 282 437 225 329 272 28
## [121] 77 338 72 246 319 146 159 380 323 511 407 278 80 403 399
## [136] 14 137 343 346 347 349 289 351 353 54 99 358 361 362 366
## [151] 372 365 277 109 377 379 22 378 330 364 401 232 255 384 167
## [166] 212 514 391 400 376 402 396 302 398 6 370 369 409 168 104
## [181] 408 413 148 10 333 419 415 424 425 423 422 435 439 442 448
## [196] 443 454 444 52 459 458 456 460 447 470 466 484 184 485 32
## [211] 487 491 494 193 516 496 499 29 78 520 507 506 512 126 64
## [226] 242 518 523 539 534 436 525 541 40 455 410 45 38 49 48
## [241] 67 68 65 91 37 8 179 209 219 221 227 153 186 253 202
## [256] 216 275 233 280 309 321 93 316 85 107 350 279 334 348 150
## [271] 73 385 418 197 450 452 115 46 76 96 100 105 101 122 11
## [286] 139 142 127 143 140 149 163 160 180 238 183 222 185 217 215
## [301] 213 237 230 234 35 245 158 258 259 260 411 257 271 18 106
## [316] 210 273 71 284 301 305 293 264 311 304 313 288 320 314 332
## [331] 341 352 243 368 393 132 220 412 420 426 417 429 433 446 357
## [346] 479 483 489 229 481 497 451 492
## 353 Levels: 10 100 101 102 103 104 105 106 107 108 109 11 110 112 113 ... NULL
##
## $country
## [1] PRT GBR USA ESP IRL FRA NULL ROU NOR OMN ARG POL DEU BEL CHE
## [16] CN GRC ITA NLD DNK RUS SWE AUS EST CZE BRA FIN MOZ BWA LUX
## [31] SVN ALB IND CHN MEX MAR UKR SMR LVA PRI SRB CHL AUT BLR LTU
## [46] TUR ZAF AGO ISR CYM ZMB CPV ZWE DZA KOR CRI HUN ARE TUN JAM
## [61] HRV HKG IRN GEO AND GIB URY JEY CAF CYP COL GGY KWT NGA MDV
## [76] VEN SVK FJI KAZ PAK IDN LBN PHL SEN SYC AZE BHR NZL THA DOM
## [91] MKD MYS ARM JPN LKA CUB CMR BIH MUS COM SUR UGA BGR CIV JOR
## [106] SYR SGP BDI SAU VNM PLW QAT EGY PER MLT MWI ECU MDG ISL UZB
## [121] NPL BHS MAC TGO TWN DJI STP KNA ETH IRQ HND RWA KHM MCO BGD
## [136] IMN TJK NIC BEN VGB TZA GAB GHA TMP GLP KEN LIE GNB MNE UMI
## [151] MYT FRO MMR PAN BFA LBY MLI NAM BOL PRY BRB ABW AIA SLV DMA
## [166] PYF GUY LCA ATA GTM ASM MRT NCL KIR SDN ATF SLE LAO
## 178 Levels: ABW AGO AIA ALB AND ARE ARG ARM ASM ATA ATF AUS AUT AZE BDI ... ZWE
##
## $customer_type
## [1] Transient Contract Transient-Party Group
## Levels: Contract Group Transient Transient-Party
##
## $deposit_type
## [1] No Deposit Refundable Non Refund
## Levels: No Deposit Non Refund Refundable
##
## $distribution_channel
## [1] Direct Corporate TA/TO Undefined GDS
## Levels: Corporate Direct GDS TA/TO Undefined
##
## $hotel
## [1] Resort Hotel City Hotel
## Levels: City Hotel Resort Hotel
##
## $is_canceled
## [1] 0 1
## Levels: 0 1
##
## $is_repeated_guest
## [1] 0 1
## Levels: 0 1
##
## $market_segment
## [1] Direct Corporate Online TA Offline TA/TO Complementary
## [6] Groups Undefined Aviation
## 8 Levels: Aviation Complementary Corporate Direct Groups ... Undefined
##
## $meal
## [1] BB FB HB SC Undefined
## Levels: BB FB HB SC Undefined
##
## $reservation_status
## [1] Check-Out Canceled No-Show
## Levels: Canceled Check-Out No-Show
##
## $reserved_room_type
## [1] C A D E G F H L P B
## Levels: A B C D E F G H L P
## Rows: 119,390
## Columns: 13
## $ adr <dbl> 0.00, 0.00, 75.00, 75.00, 98.00, 98....
## $ adults <dbl> 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, ...
## $ babies <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ booking_changes <dbl> 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ children <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ days_in_waiting_list <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ lead_time <dbl> 342, 737, 7, 13, 14, 14, 0, 9, 85, 7...
## $ previous_bookings_not_canceled <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ previous_cancellations <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ required_car_parking_spaces <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ stays_in_week_nights <dbl> 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, ...
## $ stays_in_weekend_nights <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ total_of_special_requests <dbl> 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, ...